// cd /projects/data_commons/cw_code/
// qstata cw_geo_msa.do &

di "Started at $S_DATE $S_TIME"

global dir_cw = "/projects/data_commons/cw/"

tempfile ds_temp
global ds_temp = "`ds_temp'"

//==============================================================================
// 2010 MSA from Enrico Moretti

use "${dir_cw}/raw/msa_crosswalk.dta", clear

rename name_msa msa_str
sort msa fips

// Identify issues with MSA codes and export crosswalk between code and string

preserve

bysort msa msa_str: keep if _n == 1
by msa: gen n_obs = _N
list if n_obs > 1
keep msa msa_str

/*REDACTED:
MSA variable is updated for a few instances
*/

duplicates drop

save "${dir_cw}/cw_geo_msa_str", replace
saveold "${dir_cw}/cw_geo_msa_str_v12", replace v(12)
export delimited "${dir_cw}/cw_geo_msa_str.csv", replace

restore

// Export main crosswalk

/*REDACTED:
MSA variable is updated for a few instances
*/

duplicates drop

save "${dir_cw}/cw_geo_fips_msa", replace
saveold "${dir_cw}/cw_geo_fips_msa_v12", replace v(12)
export delimited "${dir_cw}/cw_geo_fips_msa.csv", replace

//==============================================================================
// 1983 MSA from Chang-Tai Hsieh

use "${dir_cw}/raw/msa1983_2010.dta", clear

/*REDACTED:
MSA1983 variable is updated for a few instances
*/

//------------------------------------------------------------------------------
// Deal with duplicates

sort fips msa2010
by fips: gen n_obs = _N
count if n_obs != 1 
list fips msa2010 msalabel2010 if n_obs != 1
rename n_obs n_obs_2010

sort fips msa1983
by fips: gen n_obs = _N
count if n_obs != 1 
list fips msa1983 msalabel1983 if n_obs != 1 
rename n_obs n_obs_1983


// Duplicates are PMSA vs MSA. Drop PMSA.

list fips msa1983 msalabel1983 if n_obs_1983 > 1 & regexm(msalabel1983, "PMSA| PM$")
drop if n_obs_1983 > 1 & regexm(msalabel1983, "PMSA| PM$")
sort fips msa1983
by fips: gen n_obs = _N
count if n_obs != 1 
list fips msa1983 msalabel1983 if n_obs != 1

/*REDACTED
Certain combinations of fips, msa1983 in the xwalk are dropped 
// Manually select some MSA chosen by Hsieh
*/

// Add some msa label
sort msa1983 msalabel1983
by msa1983: replace msalabel1983 = msalabel1983[_N] if msalabel1983 == ""

keep fips msa1983 msalabel1983
duplicates drop

save ${ds_temp}_83, replace

//------------------------------------------------------------------------------
// Merge with 2010 MSA

use "${dir_cw}/cw_geo_fips_msa", clear
merge 1:1 fips using "${ds_temp}_83", gen(m_msa1983)

list if m_msa1983 == 2
list if fips == 12086 // Hsieh usually fixes this when reading raw data.

gen st_str = msa_str if msa < 60
gen st = substr(msa_str,-2,.) if msa >= 60

merge m:1 st_str using "${dir_cw}/cw_geo_state", gen(m_st_str) keep(master match)
merge m:1 st using "${dir_cw}/cw_geo_state", gen(m_st) update keep(master match match_update match_conflict)

replace msalabel1983 = st_str if msa1983 == .
replace msa1983 = st_msa if msa1983 == .

keep fips msa1983 msalabel1983
rename msalabel1983 msa1983_str
list if msa1983 == .

/* REDACTED
Additional manual fixes to the msa1983 variable 
// Some additional manual fix:
*/
save "${dir_cw}/cw_geo_fips_msa1983", replace
saveold "${dir_cw}/cw_geo_fips_msa1983_v12", replace v(12)
export delimited "${dir_cw}/cw_geo_fips_msa1983.csv", replace

di "Ended at $S_DATE $S_TIME"
// End of do file
